#Packages applied
library("twitteR")
## Warning: package 'twitteR' was built under R version 4.2.2
library(tm)
## Warning: package 'tm' was built under R version 4.2.2
## Loading required package: NLP
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:twitteR':
##
## id, location
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library("plotly")
## Warning: package 'plotly' was built under R version 4.2.2
## Loading required package: ggplot2
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:NLP':
##
## annotate
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(ggplot2)
library(RColorBrewer)
library(tidytext)
## Warning: package 'tidytext' was built under R version 4.2.2
library(stringr)
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.2.2
library(rtweet)
## Warning: package 'rtweet' was built under R version 4.2.2
##
## Attaching package: 'rtweet'
## The following object is masked from 'package:twitteR':
##
## lookup_statuses
library(corpus)
## Warning: package 'corpus' was built under R version 4.2.2
library(magrittr)
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:tidyr':
##
## extract
library(wordcloud)
## Warning: package 'wordcloud' was built under R version 4.2.2
library(wordcloud2)
## Warning: package 'wordcloud2' was built under R version 4.2.2
library(syuzhet)
## Warning: package 'syuzhet' was built under R version 4.2.2
##
## Attaching package: 'syuzhet'
## The following object is masked from 'package:rtweet':
##
## get_tokens
##1. Extract from twitter using your developer’s credentials. Choose any keyword you want
CONSUMER_SECRET <- "gbuzairwHJDlzG6zmK3fFxqcEo2GIHbRy89NISU80IaqvVhjIx"
CONSUMER_KEY <- "s2yMTMlykz9iJ2I86kHzWsqtF"
ACCESS_SECRET <- "znEI53FQ3P1xhF3BAfvwiptXKtTMSAmD9BTzozQD8ax98"
ACCESS_TOKEN <- "1598161218618867713-qojQanThDqRQOlGJ4YcCXhFkYDx1gr"
#Connect to twitter link
setup_twitter_oauth(consumer_key = CONSUMER_KEY,
consumer_secret = CONSUMER_SECRET,
access_token = ACCESS_TOKEN,
access_secret = ACCESS_SECRET)
## [1] "Using direct authentication"
#Get 10000 observations “excluding retweets.
TrendTweets <- searchTwitter("#wednesdaynetflix -filter:retweets",
n = 10000,
lang = "en",
since = "2022-11-23",
until = "2022-11-30",
retryOnRateLimit=120)
TrendingTweetsDF <- twListToDF(TrendTweets)
save(TrendingTweetsDF,file = "TrendingTweetDF.Rdata")
Or
#Existed data file(set the location folder)
setwd("C:/CS101_DATA_SCIENCE/Nalaza_Repo/Individual Project/Individual Project 1")
#use the data file
load(file = "TrendingTweetDF.Rdata")
sapply(TrendingTweetsDF, function(x) sum(is.na(x)))
## text favorited favoriteCount replyToSN created
## 0 0 0 9285 0
## truncated replyToSID id replyToUID statusSource
## 0 9434 0 9285 0
## screenName retweetCount isRetweet retweeted longitude
## 0 0 0 0 10000
## latitude
## 10000
#Plot the trend retweets with number of tweets and date.
ggplot(data = tweetsDF, aes(x = created), fill = tweetsDF) +
geom_histogram(aes(fill = ..count..)) +
theme(legend.position="right",
axis.title.x = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1)) +
xlab("Time") + ylab("Number of tweets") +
scale_fill_gradient(low = "black", high = "midnightblue") +
ggtitle("Trendtweets #wednesdaynetflix")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#summary of date created.
tweetsDF %>% group_by(1) %>%
summarise(max = max(created), min = min(created))
## # A tibble: 1 × 3
## `1` max min
## <dbl> <dttm> <dttm>
## 1 1 2022-11-29 23:59:50 2022-11-27 21:08:01
tweetsDF %<>%
mutate(Created_At_Round = created%>%
round(units = 'hours') %>%
as.POSIXct())
tweetsDF %>% pull(created) %>% min()
## [1] "2022-11-27 21:08:01 UTC"
tweetsDF %>% pull(created) %>% max()
## [1] "2022-11-29 23:59:50 UTC"
#plot of created trend tweets
plt <- tweetsDF %>%
dplyr::count(Created_At_Round) %>%
ggplot(mapping = aes(x = Created_At_Round, y = n)) +
theme_light() +
geom_line() +
xlab(label = 'Date') +
ylab(label = NULL) +
ggtitle(label = "Number of Tweets per Hour")
plt %>% ggplotly()
#Frequency of tweets
Frequency <- ts_plot(tweetsDF, "hours") +
labs(x = NULL, y = NULL,
title = "Frequency of tweets with a #wendesdaynetflix hashtag",
subtitle = paste0(format(min(tweetsDF$created), "%d %B %Y"), " to ",
format(max(tweetsDF$created),"%d %B %Y")),
caption = "Data collected from Twitter's REST API via twitteR") +
theme_minimal()
ggplotly(Frequency)
#Plot a graph (any graph you want)based on the type of device - #found in Source - that the user use. Include the legends.
#The encode source of tweets
#Plotting the tweet source
ggplot(tweetsDF[tweetsDF$tweetSource != 'others',], aes(tweetSource, fill = tweetSource)) +
geom_bar() +
theme(legend.position="right",
axis.title.x = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1)) +
ylab("Number of tweets") +
ggtitle("Tweets by Source")
#Create a wordcloud from the screenName #Convert into corpus
namesCorpus <- Corpus(VectorSource(tweetsDF$screenName))
#Extract the data for wordcloud
TweetCloud <- tweetsDF %>%
select(screenName) %>%
group_by(screenName) %>%
summarize(count=n()) %>%
arrange(desc(count))
#Running the code using the wordcloud()
wordcloud2(TweetCloud,
size=2,
color='random-dark',
shape = 'Diamond')